import requests
import sys
from lxml import etree
url = 'http://kaoyan.eol.cn/html/ky/09blbph/'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"}
response = requests.get(url, headers=headers)

html = response.text
# print(type(html))
html_z = html.encode('ISO-8859-1')
string = html_z.decode('utf-8')
print(type(string))
element = etree.HTML(string)
schools = element.xpath('//table//tr')
print(len(schools))
for school in schools:
    name = school.xpath('.//td[1]//text()')
    if len(name) == 0:
        continue
    print(name[0])
    year_int = 2018
    while year_int > 2013:
        td_int = 2020-year_int
        td_str = './/td['+str(td_int)+']//'
        scoreyear = school.xpath(td_str+'text()')
        if len(scoreyear) == 0:
            year_int = year_int - 1
            continue
        print(scoreyear[0])
        if scoreyear[0] != '-':
            scorelink = 'http:'+school.xpath(td_str+'@href')[0]
            print(scorelink)
        year_int = year_int - 1






